**READ IN UNEMPLOYMENT RATES AND HOUSE PRICES, COUNTY LEVEL, MATCH TO ZIP CODES***

/* data source is BLS LAU (downloaded: dec 19 2014)
FHFA HPI, all transactions, metro and non metro versions (downloaded: may 28 2014)
Census median home prices in 2000 (downloaded: may 28 2014)
Various geo crosswalks: zip to county, msa to county, from census 2000

*/


cd $datafolder
*---------------------------------------------------------------* 
*----COUNTY LEVEL BLS Unemployment Data------------------------*
*----------------------------------------------------------------*
**NOTE: mostly at county Level, but not for some large counties; see appendix to Dettling and Kearney JPUBEC 2014 for more**



insheet using "all_geocodes_v2009.txt", clear
drop if v2==""
keep if v1=="050"
rename v7 countyname
destring(v2), replace
destring(v3), replace
rename v3 countyfip
rename v2 statefip
keep countyname statefip countyfip

save "countyfips.dta", replace

set more off

**These are large data files that be downloaded directly from BLS; We provide the processed files, code to process raw files is commented out.

/*
foreach n in 9599 0004 0509 {
set more off
disp "insheet using `n'.txt clear"
insheet using "la_data_0_CurrentU`n'.txt", clear
disp "keep if period"
keep if period=="M03"|period=="M06"|period=="M09"|period=="M12"
gen q = .
	replace q = 1 if period=="M03"
	replace q = 2 if period=="M06"
	replace q = 3 if period=="M09"
	replace q = 4 if period=="M12"
gen mo = .
	replace mo = 3 if period=="M03"
	replace mo = 6 if period=="M06"
	replace mo = 9 if period=="M09"
	replace mo = 12 if period=="M12"
disp "quarter = "
gen quarter = year*100 + mo

drop year q mo period
reshape wide value footnote, i(series_id) j(quarter)
save "lau`n'.dta", replace 
}

use "lau0509.dta", clear
reshape long value footnote, i(series_id) j(quarter)
replace value="" if value=="-"
destring(value), replace
reshape wide value footnote, i(series_id) j(quarter)
save "lau0509.dta", replace
*/



/* get series id# and corresponding measure, with area type and area code http://download.bls.gov/pub/time.series/la/la.series */
*insheet using "lauareacodes.txt", clear
insheet using "la_series.txt", clear
keep  series_id  area_type_code area_code  measure series_title
save "series2.dta", replace

/* get area names corresponding to area code from http://download.bls.gov/pub/time.series/la/la.area */
*insheet using "lauarea.txt", clear
insheet using "la_area.txt", clear
keep  area_code area_text
bysort area_code: gen n=_n
keep if n==1
drop n
save "areanames2.dta", replace

use "series2.dta", clear
merge m:1 area_code using "areanames2.dta"
drop if _merge==2
drop _merge

keep if measure_code==3
keep if area_type_code=="F"
egen countyname=ends(area_text), punct(,) head

gen statefip = substr(series_id,6,2) 
/*egen statefip=ends(area_code), punct(CN) tail
egen statefip2=ends(area_code), punct(PA) tail
egen statefip3=ends(area_code), punct(PS) tail
replace statefip=statefip2 if statefip==""
replace statefip=statefip3 if statefip=="" */
destring(statefip), replace
*replace statefip=floor(statefip/10000) 
*drop statefip2 statefip3


*Fix some names (some places in alaska have weird names, combined/split), drop puerto rico*
drop if statefip==43
replace countyname="Anchorage Municipality" if countyname=="Anchorage Borough/municipality"
replace countyname="Broomfield County" if countyname=="Broomfield County/city"
replace countyname="Denver County" if countyname=="Denver County/city"
replace countyname="Doña Ana County" if countyname=="Dona Ana County"
replace countyname="Honolulu County" if countyname=="Honolulu County/city"
replace countyname="Juneau City and Borough" if countyname=="Juneau Borough/city"
replace countyname="Nantucket County" if countyname=="Nantucket County/town"
replace countyname="Philadelphia County" if countyname=="Philadelphia County/city"
replace countyname="San Francisco County" if countyname=="San Francisco County/city"
replace countyname="Sitka City and Borough" if countyname=="Sitka Borough/city"
replace countyname="Skagway Municipality" if countyname=="Skagway Borough/municipality"
replace countyname="Yakutat City and Borough" if countyname=="Yakutat Borough/city"
**IN Alaska, some issues over time, use largest town for pre-split data
replace countyname="Skagway Municipality" if countyname=="Skagway-Hoonah-Angoon Census Area"
replace countyname="Wrangell City and Borough" if countyname=="Wrangell-Petersburg Census Area"
replace countyname="Ketchikan Gateway Borough" if countyname=="Prince of Wales-Outer Ketchikan Census Area"



sort statefip countyname
merge m:1 statefip countyname using "countyfips.dta"
drop _merge
**fix alaska, hi that were split/combined
replace series_id="LAUPA15015003" if countyname=="Kalawao County" /* no longer shows up >2005*/
replace series_id="LAUCN02280003" if countyname=="Petersburg Census Area" /* new series ID is for Wrangell-Petersburg Census Area, AK */
replace series_id="LAUCN022010000000003" if countyname=="Prince of Wales-Hyder Census Area" /* the new series id is for Prince of Wales-Outer Ketchikan Census Area, AK */


merge m:1 series_id using "lau9599.dta"
drop if _merge==2
drop _merge

merge m:1 series_id using "lau0004.dta"
drop if _merge==2
drop _merge

merge m:1 series_id using "lau0509.dta"
drop if _merge==2
drop _merge

keep value* countyfip statefip
**2 places in AK are doubled

replace countyfip = statefip*1000+countyfip
bysort countyfip: gen n=_n
replace countyfip=2130.5 if countyfip==2130 &  n==2
replace countyfip=2230.5 if countyfip==2230 & n==2
drop n
drop if countyfip==.
reshape long value countyfip_3, i(countyfip) j(quarter)

replace countyfip=2130 if countyfip==2130.5
replace countyfip=2230 if countyfip==2230.5
bysort countyfip quarter: gen n=_n
reshape wide value, i(countyfip quarter) j(n)
gen unemp_rate=(value1+value2)/2
replace unemp_rate=value1 if value2==.

keep countyfip unemp_rate quarter
sort countyfip
**miami code change
replace countyfip=12086 if countyfip==12025

gen year=floor(quarter/100)
replace quarter=quarter-(year*100)

reshape wide unemp_rate, i(countyfip year) j(quarter)
drop unemp_rate3 unemp_rate9

keep if year>=1999 & year<=2009

reshape wide unemp_rate6  unemp_rate12, i(countyfip) j(year)
save "county_urates.dta", replace


*--------------------------------------------------------------------------------*
*----------Read in HPI Data (MSA/RURAL STATE --BASED ON 2013 CBSA)----------*
*----------------------------------------------------------------------------*

**first, county links

import excel "List1.xls", sheet("List 1") firstrow clear
keep CBSACode MetroDivisionCode CSACode FIPSStateCode FIPSCountyCode

rename CBSACode msa 
rename MetroDivisionCode msad 
rename CSACode csa 
rename FIPSStateCode statefip 
rename FIPSCountyCode countyfip

sort statefip countyfip

drop if countyfip==""
destring(countyfip), replace
destring(statefip), replace
destring(csa), replace
destring(msad), replace
destring(msa), replace

save "msa_county.dta", replace

**msa data
* quarterly
clear
insheet using "HPI_AT_metro.csv", comma

rename v1 msa_name
rename v2 msa
rename v3 year
rename v4 q
rename v5 hpi_q
drop v6

replace hpi_q="" if hpi_q=="-"
destring(hpi_q), replace
ren hpi_q hpi_m_q
keep if year>=1999
replace q=12 if q==4
replace q=9 if q==3
replace q=6 if q==2
replace q=3 if q==1


gen quarter = year*100+q
drop year q
reshape wide hpi_m, i(msa) j(quarter)

save "hpi_msa_qtr.dta", replace


**rural state data
*import excel "indexes_nonmetro_thru_13q2.xls", sheet("hpi_stsbal") firstrow clear
import excel "HPI_AT_nonmetro.xls", sheet("hpi_stsbal") cellrange(A4) clear

ren A state_code
ren B year
ren C q
ren D hpi_q
drop E /* standard error */

destring year q hpi_q, replace
merge m:1 state_code using "state codes.dta"
drop if _merge==2
drop _merge
ren hpi_q hpi_nm_q

replace q=12 if q==4
replace q=9 if q==3
replace q=6 if q==2
replace q=3 if q==1

gen quarter = year*100+q

keep if year>=1999

rename fips statefip 
keep statefip quarter hpi_nm
reshape wide hpi_nm, i(statefip) j(quarter)
save "hpi_nonmetro_qtr.dta", replace



insheet using "all_geocodes_v2009.txt", clear
drop if v2==""
keep if v1=="050"
rename v7 countyname
destring(v2), replace
destring(v3), replace
rename v3 countyfip
rename v2 statefip
keep countyname statefip countyfip


/* create quarterly dataset */
merge m:1 statefip countyfip using "msa_county.dta"
**drop puerto rico
drop if statefip==72
assert _merge==1 | _merge==3
drop _merge

drop csa
**hpi uses msads
replace msa=msad if msad!=.


merge m:1 msa using "hpi_msa_qtr.dta"
assert _merge!=2
drop _merge


merge m:1 statefip using "hpi_nonmetro_qtr.dta"
assert _merge!=2
drop _merge

keep hpi_* statefip countyfip

reshape long hpi_m_q hpi_nm_q, i(statefip countyfip) j(quarter)

gen hpi=hpi_nm
replace hpi=hpi_m if hpi_m!=.
drop hpi_m hpi_nm
ren hpi hpi_q
save "hpi_qtr.dta", replace 



***----------------------------------------------------------------------***
**----------------COUNTY LEVEL HOUSE VALUES -----------------------***
**-----------------------------------------------------------------------***

**these are large files, which can be downloaded directly from Census (median home values by county in 2000), We provide the processed files, code to process raw files is commented out.

/* insheet using "us00060.uf3", comma clear
rename v1 fileid 
rename v2 stusab
rename v3 charitr
rename v5 logrecno

**median home value for owner-occupied
rename v252 med_homevalue

drop v*

save "homevalues_raw.dta"


set more off

infix str fileid1 1-6 str stusab1 7-8 sumlev 9-11 geocomp 12-13 charitr1 14-16 logrecno 19-25 state 30-31 county 32-34 tract 56-61 blkgrp 62 block 63-66  using "usgeo.uf3", clear

merge 1:1 logrecno using "homevalues_raw.dta"
assert _merge==3
drop _merge

foreach n in fileid stusab charitr {
assert `n'1==`n'
drop `n'
}


**keep counties
keep if sumlev==50
**keep county average(==0) (rural==43, urban==1)
keep if geocomp==0

keep state county med_homevalue 
rename state statefip
rename county countyfip

save "homevalues.dta", replace */

/* QUARTERLY */
use "hpi_qtr.dta", clear
 merge m:1 statefip countyfip using "homevalues.dta"
 drop if _merge==2
 drop _merge
 
merge m:1 quarter using "cpi_qtr.dta"
drop if _merge==2
drop _merge

**put in 2013q3 $s
gen cpi=224.988/cpihpi
gen hpi=hpi_q*cpi


gen q0003=quarter==200003
gen hpi0003=hpi*q0003
bysort statefip countyfip: egen hpi200003=total(hpi0003)
assert hpi200003==hpi if quarter==200003
drop hpi0003 q0003

**home price in year x= homeprice2000+[homeprice2000*(percentage change in hpi)]
**note that values can be less than one (ie, depreciation or when scaling backwards)
gen apprec=1+[(hpi-hpi200003)/hpi200003]

gen med_homeprice=apprec*med_homevalue
keep statefip countyfip med_homeprice quarter

replace countyfip=statefip*1000+countyfip

gen year=floor(quarter/100)
replace quarter=quarter-(year*100)

reshape wide med_homeprice, i(countyfip year) j(quarter)
drop med_homeprice3 med_homeprice9

keep if year>=1999 & year<=2009

reshape wide med_homeprice6 med_homeprice12, i(countyfip) j(year)

save "county_hprices.dta", replace



***-----------------------------------***
***Merge urate and hprice data and match to zipcodes***
**------------------------------------***


***match to zipcodes**
**this is census 2000 zcta5 to county crosswalks, with population weights
use "zip_county.dta", clear
**zip weights are population weights
keep zipcode county zipweight
rename county countyfip
merge m:1 countyfip using "county_hprices.dta"
keep if _merge==3
drop _merge
merge m:1 countyfip using "county_urates.dta"
keep if _merge==3
drop _merge
sort zipcode

reshape long unemp_rate6 unemp_rate12 med_homeprice6 med_homeprice12, i(zipcode countyfip) j(year)

**check weights sum to 1**
bysort year zipcode: egen tweight=total(zipweight)
replace zipweight=zipweight/tweight
drop tweight

bysort year zipcode: egen m_unemp_rate6=total(unemp_rate6*zipweight)
bysort year zipcode: egen m_unemp_rate12=total(unemp_rate12*zipweight)
bysort year zipcode: egen m_med_homeprice6=total(med_homeprice6*zipweight)
bysort year zipcode: egen m_med_homeprice12=total(med_homeprice12*zipweight)

bysort year zipcode: gen n=_n
keep if n==1
drop n


keep zipcode year m_*

foreach n in unemp_rate med_homeprice {
rename m_`n'6 `n'6
rename m_`n'12 `n'12
}

label var unemp_rate6 "County Unemployment Rate, BLS, June"
label var unemp_rate12 "County Unemployment Rate, BLS, Dec"
label var med_homeprice6 "County Median Home Price, FHFA+Census, June"
label var med_homeprice12 "County Median Home Price, FHFA+Census, Dec"

keep if year<=2008

gen zip=zipcode

save "$datafolder2/county_hprice_urate_zips.dta", replace

